home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
- #/****************************************************
- #**
- #** SOURCE NAME | getnews, (Get News)
- #** |
- #** SYNOPSIS | getnews [-h hostname] [-p port] [-n cfgfile] [-W timeout]
- #** |
- #** DESCRIPTION | getnews goes to a specified NNTP server
- #** | and saves new news articles
- #** | into directory ./groups/<newsgroup>/
- #** | Please see the NOTES section.
- #** |
- #** CHANGES | Programmer: Date: Reason/Comments
- #** | Jeffrey B. McGough 07-27-92 VERSION 2.0 (pgnews)
- #** | Pavel Klark 03-26-94 VERSION 1.0 (getnews)
- #** |
- #** NOTES | getnews needs a file named getnews.cfg to read
- #** | its newsgroup and last message number from.
- #** | getnews.cfg format is:
- #** | newsgroup number
- #** | Example:
- #** | comp.unix.wizards 7800
- #** | comp.unix.shell 3203
- #** | comp.unix.questions 546
- #** |
- #** | getnews is able to process trn-style kill-file commands:
- #** | it locates your kill-file in directory $HOME/News,
- #** | (See variable $kill_location below).
- #** | and expects all commands to be of format
- #** | /pattern/some-commands. Article is killed (doesn't get
- #** | archived) if any header line matches the pattern.
- #** |
- #** AUTHORS | Jeffrey B. McGough mcgough@wrdis01.af.mil
- #** | Pavel Clark, paul@cs.arizona.edu
- #** |
- #****************************************************/
-
- unshift(@INC,'/usr1/paul/lib/perl');
- $gzip = "/usr/local/bin/gzip -f";
-
- $GLIMPSEIDX_LOC='/usr/paul/bin/glimpseindex';
- # Your kill-file top directory, the following is trn's default
- $kill_location = $ENV{'HOME'} . "/News";
-
- require 'sys/socket.ph'; # The way I coded the sockets is this necessary?
- require 'getopts.pl';
- # -p portnumber : Port to connect to; default 119
- # -h host : Server host to connect to
- # -n getnews : Name of getnews file; default getnews.cfg
- # -W timeout : Timeout wait period for response, sec.; default 900 (= 15min)
- $opt_h = $ENV{'NNTPSERVER'};
- $opt_h = 'cs.arizona.edu' unless $ENV{'NNTPSERVER'};
- $opt_p = 119;
- $opt_n = 'getnews.cfg';
- $opt_W = 900;
- &Getopts ('h:p:n:W:');
-
- $VERSION = '2.0';
-
- $port = $opt_p; # For NNTP
- # HOSTNAME for the server...
- $host = $opt_h;
- # Pack format...
- $sockaddr = 'S n a4 x8';
-
- $waittime = $opt_W;
-
- $DOMAIN = &AF_INET;
- $STYLE = &SOCK_STREAM;
-
- $newsfile = $opt_n;
- $nnewsfile = "${opt_n}.new";
- $newarticles = "groups/newarticles";
-
- $rin = $rout = '';
-
- ($name, $aliases, $proto) = getprotobyname('tcp');
- ($name, $aliases, $type, $len, $hostaddr) = gethostbyname($host);
-
- $sock = pack($sockaddr, $DOMAIN, $port, $hostaddr);
-
- socket(S, $DOMAIN, $STYLE, $proto) || die $!;
- connect(S, $sock) || die $!;
- select(S); $| = 1; select(STDOUT);
- #set up for select
- vec($rin, fileno(S), 1) = 1;
- #this select will block until the server gives us something.
- $nfound = select($rout=$rin, undef, undef, $waittime);
- if ($nfound == 0)
- {
- print "Socket timed out...";
- exit 1;
- }
- $_ = <S>; #Read one line to see if we got a good connection.
- if (!/^2../)
- {
- print;
- die "Service unavailable";
- }
- open(GRP, "<$newsfile") || die "Could not open $newsfile: $!";
- open(NGRP, ">$nnewsfile") || die "Could not open $nnewsfile: $!";
- open(IDXFILE, ">>$newarticles");
- $totalcount = 0;
- select(NGRP); $| = 1; select(STDOUT);
- group: while(<GRP>) {
- if (/^#/) {
- # leave the comment as is
- print NGRP $_;
- next group;
- }
- chop;
- ($grp, $lgot) = split;
- print(S "group $grp\n");
- #this select will block until the server gives us something.
- $nfound = select($rout=$rin, undef, undef, $waittime);
- if ($nfound == 0)
- {
- print "Socket timed out...";
- exit 1;
- }
- $_ = <S>; #Make sure the group change worked...
- ($stat, $num, $first, $last) = split;
- if( $stat !~ /^2../ )
- {
- warn "Bad group $grp: $_";
- print(NGRP "$grp $lgot\n");
- next group;
- }
- #
- # create group directories, if necessary
- #
- -d "groups" || mkdir("groups",0777) ||
- die "Couldn't create directory groups: $!" ;
- -d "groups/$grp" || mkdir("groups/$grp",0777) ||
- die "Couldn't create directory groups/$grp: $!" ;
- -d "indices" || mkdir("indices",0777) ||
- die "Couldn't create directory indices: $!" ;
- -d "indices/$grp" || mkdir("indices/$grp",0777) ||
- die "Couldn't create directory indices/$grp: $!" ;
- #
- # access kill-file (in directory $HOME/News)
- #
- $dir = $kill_location;
- $killfile = $grp;
- $killfile =~ s|\.|/|g;
- $killfile = "$dir/$killfile/KILL";
- if (open(KILL, $killfile))
- {
- @karray = ();
- while (<KILL>) {
- ($dummy,$pattern) = split(m|/|);
- push(@karray,$pattern) if $pattern;
- }
- } else {
- $killfile = undef;
- }
- close(KILL);
- if ( $first > $lgot )
- {
- $lgot = $first;
- }
- $count = 0;
- if ( $lgot < $last )
- {
- article: foreach $art ($lgot..$last)
- {
- print(S "article $art\n");
- #this select will block until the server gives us something.
- $nfound = select($rout=$rin, undef, undef, $waittime);
- if ($nfound == 0)
- {
- print "Socket timed out...";
- exit 1;
- }
- $_ = <S>; #get error if one exists
- if(!/^2../)
- {
- warn "No article $art in $grp\n";
- next article;
- }
- # We now slurp the whole article into the array article...
- # HMMM is this good or bad...
- # It gives me the WILLIES [:^) Jeffrey B. McGough
- @article = ();
- do {
- # The next few lines have been commented out because they don't work
- # JBM 07-27-92
- # $nfound = select($rout=$rin, undef, undef, $waittime);
- # if ($nfound == 0)
- # {
- # print "Socket timed out...";
- # exit 1;
- # }
- $lgot = $art;
- $_ = <S>;
- s/\r//g;
- if( $_ ne ".\n") {
- push(@article,$_);
- } else {
- push(@article,"\n");
- }
- } until $_ eq ".\n";
- if ( !&desc ) {
- # header matches kill-file
- next article;
- }
- ++$count;
- ++$totalcount;
- }
- } else {
- $lgot -= 1;
- }
- $lgot += 1;
- print(NGRP "$grp $lgot\n");
- print "$grp: $count new articles\n";
- if ($count>0) {
- $cmd = "exec $GLIMPSEIDX_LOC -o -z -H indices/$grp ".
- "groups/$grp >/dev/null";
- system "$cmd";
- }
- }
- close(NGRP);
- close(GRP);
- close(IDXFILE);
- if ($totalcount>0) {
- $cmd = "build_idx &";
- system "$cmd";
- } else {
- unlink("$newarticles");
- }
- rename ($newsfile, "$newsfile.old") ||
- warn ("Unable to rename $newsfile to ${newsfile}.old\n");
- rename ($nnewsfile, $newsfile) ||
- warn ("Unable to rename ${nnewsfile} to ${newsfile}\n");
- print( S "quit\n");
- close(S);
-
- # We parse through @article to extract header information
- # and then save the article
- # Returns article no, or empty string if article is to be killed
- sub desc
- {
- local($pattern,$author,$subject,$ID,$date,$filename);
- # global parameters: $grp, $art, @article, @karray
-
- scan: foreach (@article) {
- last scan if /^\n$/;
- foreach $pattern (@karray) {
- if (/$pattern/i) {
- return undef;
- }
- }
- s/\s+/ /;
- if ( /^From: (.*)/ ) {
- $author = $1;
- if ($author =~ /([\w\d][-+\w\d.]*@[\w\d][-\w\d.]*)/) {
- $address = $1;
- } else {
- $address = $author;
- }
- } elsif (/^Message-ID: \<?([^\s\>]*)/) {
- $ID=$1;
- } elsif (/^Subject: (.*)/) {
- $subject=$1;
- } elsif (/^Date: (.*)/) {
- $date=$1;
- }
- }
- # good article, now open output file...
- $filename = "/$grp/$art";
- $file = "groups$filename";
- open(OUTFILE, ">$file") ||
- die "Could not open $file";
- print OUTFILE @article;
- close(OUTFILE);
- if ($grp =~ /soc.culture/) {
- system("$gzip $file");
- $filename .= ".gz";
- }
- # Write header information
- print IDXFILE "$filename\t$ID\t$address\t$author\t$subject\t$date\n";
- return $art;
- }
-